sink("uptake.compliance-psrm-log.txt",append=F,type="output")

##################
####Toolbar Uptake by Respondent Characteristics
##################
library(plyr)
library(SDMTools)
library(xtable)
library(apsrtable)
library(zoo)

rescale <- function(data){
	minimum.vec <- min(data,na.rm=TRUE)
	maximum.vec <- max(data,na.rm=TRUE)
	out <- (data-minimum.vec)/(maximum.vec-minimum.vec)
	return(out)
}

load('pre.survey-psrm.RData')
pre.survey$rep.partisanship <- NULL
load('page.views.summary-psrm.RData')

page.views.total <- ddply(page.views.summary,.(caseid),summarise,totalviews=sum(totalviews,na.rm=TRUE))
amount.comparison <- merge(pre.survey,page.views.total,by=c('caseid'))
demoviews.1 <- lm(totalviews ~ dem.partisanship.3pt + ind.partisanship.3pt + age + female + black + hispanic + college.plus + some.college ,data=amount.comparison)
demoviews.2 <- lm(log(totalviews) ~ dem.partisanship.3pt + ind.partisanship.3pt + age + female + black + hispanic + college.plus + some.college ,data=amount.comparison)

#Table F1 Here
apsrtable(demoviews.1,demoviews.2, model.names=c('Political Pageviews','Log(Political Pageviews)'), coef.names=c('(Intercept)','Democrat','Independent','Age','Female','Black','Hispanic','College/Graduate Degree','Some College'))


full.frame <- as.data.frame(expand.grid(unique(pre.survey$caseid),unique(page.views.summary$date)))
names(full.frame) <- c('caseid','date')

full.frame <- merge(full.frame,page.views.summary,by.x=c("caseid","date"),by.y=c("caseid","date"),all.x=TRUE)
full.frame$active.user.day <- ifelse(full.frame$totalviews > 0, 1, 0)
full.frame$active.user.day[which(is.na(full.frame$totalviews))] <- 0
full.frame <- merge(full.frame,pre.survey,by=c("caseid"),all.x=TRUE)
full.frame$date <- as.Date(full.frame$date,"%Y-%m-%d")

active.at.point <- unique(full.frame$caseid[which(full.frame$active.user.day==1)])
active.frame <- subset(full.frame,full.frame$caseid %in% active.at.point)
active.lately <- unique(full.frame$caseid[which(full.frame$active.user.day==1 & full.frame$date > '2016-10-31')])
active.early <- unique(full.frame$caseid[which(full.frame$active.user.day==1 & full.frame$date < '2016-08-08')])

pre.survey$late.active <- 0
pre.survey$late.active[which(pre.survey$caseid %in% active.lately)] <- 1

visits <- ddply(active.frame,.(date),summarise,active.user.day=weighted.mean(x=active.user.day,w=weight_pulse))

#Active by Day
pdf(file="figure-a2.pdf",height=4,width=5)
plot(x=visits$date,y=rollmean(visits$active.user,7,fill=list(NA,NULL,NA)),type='l',main="% Initial Sample Active On Web By Day",las=1,yaxt='n',xlab="Date",ylim=c(.5,1),ylab="",lwd=3)
axis(side=2,at=c(.5,.6,.7,.8,.9,1),labels=c("50%","60%","70%","80%","90%","100%"),las=1)
dev.off()

pre.survey$income <- rescale(pre.survey$income)
pre.survey$age <- rescale(pre.survey$age)
pre.survey$political.interest <- rescale(pre.survey$political.interest)
pre.survey$party.7pt <- rescale(pre.survey$party.7pt)
pre.survey$obama.therm <- rescale(pre.survey$obama.therm)
pre.survey$clinton.therm <- rescale(pre.survey$clinton.therm)
pre.survey$trump.therm <- rescale(pre.survey$trump.therm)
pre.survey$tea.party.therm <- rescale(pre.survey$tea.party.therm)
pre.survey$ideology <- rescale(pre.survey$ideology)
pre.survey$syria.refugees <- rescale(pre.survey$syria.refugees)
pre.survey$abortion <- rescale(pre.survey$abortion)

wakoopa <- subset(pre.survey,!is.na(pre.survey$weight_pulse))
wakoopa$Pulse_Flag <- NULL
wakoopa$weight_full <- NULL
wakoopa$caseid <- NULL

wakoopa.late <- subset(wakoopa,wakoopa$late.active==1)
wakoopa.late$late.active <- NULL
wakoopa$late.active <- NULL

pre.survey$weight_pulse <- pre.survey$weight_full
pre.survey$weight_full <- NULL
pre.survey$Pulse_Flag <- NULL
pre.survey$late.active <- NULL
pre.survey$caseid <- NULL

var.comparison <- c("income","age","female","black","hispanic","white","other.race","high.school.less","some.college","college.plus","northeast","south","midwest","west","party.7pt","dem.partisanship.3pt","rep.partisanship.3pt","ind.partisanship.3pt","strong.partisan","liberal","conservative","moderate","primary.turnout","general.turnout","syria.refugees","strong.syria","abortion","strong.abortion","tea.party.therm","trump.therm","obama.therm","clinton.therm","clinton.post")

weighted.proportion.panel <- NA
weighted.proportion.wakoopa <- NA
weighted.proportion.wakoopa.last <- NA
weighted.variance.panel <- NA
weighted.variance.wakoopa <- NA
weighted.variance.wakoopa.last <- NA
panel.sample <- NA
wakoopa.sample <- NA
wakoopa.last.sample <- NA

for(k in 1:(length(var.comparison))){
	weighted.proportion.panel[k] <- weighted.mean(x=pre.survey[,var.comparison[k]],w=pre.survey$weight_pulse,na.rm=TRUE)
	weighted.proportion.wakoopa[k] <- weighted.mean(x=wakoopa[,var.comparison[k]],w=wakoopa$weight_pulse,na.rm=TRUE)
	weighted.proportion.wakoopa.last[k] <- weighted.mean(x=wakoopa.late[,var.comparison[k]],w=wakoopa.late$weight_pulse,na.rm=TRUE)

	weighted.variance.panel[k] <- wt.var(x=pre.survey[which(!is.na(pre.survey[,var.comparison[k]])),var.comparison[k]],wt=pre.survey$weight_pulse[which(!is.na(pre.survey[,var.comparison[k]]))])
	weighted.variance.wakoopa[k] <- wt.var(x=wakoopa[which(!is.na(wakoopa[,var.comparison[k]])),var.comparison[k]],wt=wakoopa$weight_pulse[which(!is.na(wakoopa[,var.comparison[k]]))])
	weighted.variance.wakoopa.last[k] <- wt.var(x=wakoopa.late[which(!is.na(wakoopa.late[,var.comparison[k]])),var.comparison[k]],wt=wakoopa.late$weight_pulse[which(!is.na(wakoopa.late[,var.comparison[k]]))])
	
	panel.sample[k] <- length(which(!is.na(pre.survey[,var.comparison[k]])))
	wakoopa.sample[k] <- length(which(!is.na(wakoopa[,var.comparison[k]])))
	wakoopa.last.sample[k] <- length(which(!is.na(wakoopa.late[,var.comparison[k]])))
}

panel.proportion <- cbind.data.frame(weighted.proportion.panel,weighted.variance.panel,panel.sample)
panel.proportion$names <- var.comparison
panel.proportion <- panel.proportion[,c('names','weighted.proportion.panel','weighted.variance.panel','panel.sample')]
names(panel.proportion) <- c('names','mean','variance','sample.size')

wakoopa.proportion <- cbind.data.frame(weighted.proportion.wakoopa,weighted.variance.wakoopa,wakoopa.sample)
wakoopa.proportion$names <- var.comparison
wakoopa.proportion <- wakoopa.proportion[,c('names','weighted.proportion.wakoopa','weighted.variance.wakoopa','wakoopa.sample')]
names(wakoopa.proportion) <- c('names','mean','variance','sample.size')

wakoopa.last.proportion <- cbind.data.frame(weighted.proportion.wakoopa.last,weighted.variance.wakoopa.last,wakoopa.last.sample)
wakoopa.last.proportion$names <- var.comparison
wakoopa.last.proportion <- wakoopa.last.proportion[,c('names','weighted.proportion.wakoopa.last','weighted.variance.wakoopa.last','wakoopa.last.sample')]
names(wakoopa.last.proportion) <- c('names','mean','variance','sample.size')

#############
####Initial Survey to Toolbar Sample
#############

category <- NA
difference.value <- NA
difference.standard.error <- NA
ttest <- NA	
p.value <- NA
panel.value <- NA
panel.standard.error <- NA
wakoopa.value <- NA
	
for(k in 1:dim(panel.proportion)[1]){
	#k <- 1
	category[k] <- as.character(panel.proportion[k,1])
	difference.value[k] <- wakoopa.proportion[k,2] - panel.proportion[k,2]
	panel.value[k] <- panel.proportion[k,2]
	panel.standard.error[k] <- sqrt( (wakoopa.proportion[k,3]/wakoopa.proportion[k,4]) + (panel.proportion[k,3]/panel.proportion[k,4]))
	wakoopa.value[k] <- wakoopa.proportion[k,2]
	df.current <- min(panel.proportion[k,4],wakoopa.proportion[k,4])
	ttest[k] <- (wakoopa.proportion[k,2] - panel.proportion[k,2])/(sqrt ((panel.proportion[k,3]/panel.proportion[k,4]) + (wakoopa.proportion[k,3]/wakoopa.proportion[k,4])))
	p.value[k] <- 2*(1 - (pt(q=abs(ttest[k]),df=df.current,lower.tail=TRUE)))
}
	
out.frame <- cbind.data.frame(category,round(panel.value,2),round(wakoopa.value,2),round(difference.value,2),round(panel.standard.error,2),round(ttest,digits=2),round(p.value,digits=2))
names(out.frame) <- c('category','panel','wakoopa','difference','difference.se','t.stat','p.value')

var.labels <- c("Income","Age","Female","Black","Hispanic","White","Other Race","High School/Less","Some College","College/More","Northeast","South","Midwest","West","PID Scale (7-pt)","Democrat","Republican","Independent","Strong Partisan","Liberal","Conservative","Moderate","Primary Turnout","General Turnout","Refugees","Strong Stance","Abortion","Strong Stance","Tea Party","Trump","Obama","Clinton","Clinton Vote")

pdf(file='figure-a1.pdf',height=10,width=9.5)
par(mar=c(5,8,4,6.75))
plot(y=length(out.frame$difference):1,x=out.frame$difference,xlim=c(-.3,.3),pch=16,yaxt='n',xlab="Difference (Toolbar Sample - Initial YouGov Sample)
Standard Deviations for Non-Binary Variables",ylab='',main="Differences in Toolbar Sample Relative to YouGov Sample",cex=1.5,cex.main=2,cex.axis=1.2,cex.lab=1.2)
segments(y0=length(out.frame$difference):1,y1=length(out.frame$difference):1,x0=out.frame$difference + 2*out.frame$difference.se,x1=out.frame$difference - 2*out.frame$difference.se,lwd=4)
axis(side=2,at=length(out.frame$difference):1,labels=var.labels,las=1)
abline(v=0,lty=2,lwd=2)
segments(y0=length(out.frame$difference):1,y1=length(out.frame$difference):1,x0=rep(-1.5,length(out.frame$difference)),x1=rep(1.5,length(out.frame$difference)),lty=3)
segments(y0=c(30.5,26.5,23.5,19.5,14.5,11.5,9.5,5.5,1.5),y1=c(30.5,26.5,23.5,19.5,14.5,11.5,9.5,5.5,1.5),x0=c(-1.5,-1.5,-1.5,-1.5,-1.5,-1.5,-1.5,-1.5,-1.5),x1=c(1.5,1.5,1.5,1.5,1.5,1.5,1.5,1.5,1.5),lwd=2.5)
axis(side=4,at=c(1,3.5,7.5,10.5,13,17,21.5,25,28.5,32),labels=c('Vote','Therm','Issue','Turnout','Ideology','Partisan','Region','Education','Race','Demos'),las=1,cex.axis=1.2)
dev.off()

#############
####Toolbar Early/Late Comparison
#############

category <- NA
difference.value <- NA
difference.standard.error <- NA
ttest <- NA	
p.value <- NA
wakoopa.late.value <- NA
wakoopa.standard.error <- NA
wakoopa.value <- NA
	
for(k in 1:dim(panel.proportion)[1]){
	#k <- 1
	category[k] <- as.character(panel.proportion[k,1])
	difference.value[k] <- wakoopa.last.proportion[k,2] - wakoopa.proportion[k,2]
	wakoopa.late.value[k] <- wakoopa.last.proportion[k,2]
	wakoopa.standard.error[k] <- sqrt( (wakoopa.proportion[k,3]/wakoopa.proportion[k,4]) + (wakoopa.last.proportion[k,3]/wakoopa.last.proportion[k,4]))
	wakoopa.value[k] <- wakoopa.proportion[k,2]
	df.current <- min(wakoopa.last.proportion[k,4],wakoopa.proportion[k,4])
	ttest[k] <- (wakoopa.last.proportion[k,2] - wakoopa.proportion[k,2])/(sqrt ((wakoopa.proportion[k,3]/wakoopa.proportion[k,4]) + (wakoopa.last.proportion[k,3]/wakoopa.last.proportion[k,4])))
	p.value[k] <- 2*(1 - (pt(q=abs(ttest[k]),df=df.current,lower.tail=TRUE)))
}
	
out.frame <- cbind.data.frame(category,round(wakoopa.value,3),round(wakoopa.late.value,3),round(difference.value,3),round(wakoopa.standard.error,3),round(ttest,digits=3),round(p.value,digits=3))
names(out.frame) <- c('category','wakoopa','wakoopa.late','difference','difference.se','t.stat','p.value')

pdf(file='figure-a3.pdf',height=10,width=9.5)
par(mar=c(5,8,4,6.75))
plot(y=length(out.frame$difference):1,x=out.frame$difference,xlim=c(-.3,.3),pch=16,yaxt='n',xlab="Difference (Late Active Panel - Early Active Panel)
Standard Deviations for Non-Binary Variables",ylab='',main="Over-Time Differences in Active Toolbar Sample",cex=1.5,cex.main=2,cex.axis=1.2,cex.lab=1.2)
segments(y0=length(out.frame$difference):1,y1=length(out.frame$difference):1,x0=out.frame$difference + 2*out.frame$difference.se,x1=out.frame$difference - 2*out.frame$difference.se,lwd=4)
axis(side=2,at=length(out.frame$difference):1,labels=var.labels,las=1)
abline(v=0,lty=2,lwd=2)
segments(y0=length(out.frame$difference):1,y1=length(out.frame$difference):1,x0=rep(-1.5,length(out.frame$difference)),x1=rep(1.5,length(out.frame$difference)),lty=3)
segments(y0=c(30.5,26.5,23.5,19.5,14.5,11.5,9.5,5.5,1.5),y1=c(30.5,26.5,23.5,19.5,14.5,11.5,9.5,5.5,1.5),x0=c(-1.5,-1.5,-1.5,-1.5,-1.5,-1.5,-1.5,-1.5,-1.5),x1=c(1.5,1.5,1.5,1.5,1.5,1.5,1.5,1.5,1.5),lwd=2.5)
axis(side=4,at=c(1,3.5,7.5,10.5,13,17,21.5,25,28.5,32),labels=c('Vote','Therm','Issue','Turnout','Ideology','Partisan','Region','Education','Race','Demos'),las=1,cex.axis=1.2)
dev.off()

sink()